* Set up log
cd $ohie
cap log close
global sysdate: disp %tdYYNNDD  date("`c(current_date)'", "DMY")
qui log using 	"./logs/extrapolation_late_$sysdate.log", replace

* Set up timer
timer clear 1
timer on 1 

/*----------------------------------------------------------------------*/
/* PROGRAM: extrapolation_late.do					*/
/*									*/
/* PURPOSE:								*/
/* [*]	This code uses the MTO(p), MUO(p), MTE(p) estimated in the OHIE	*/
/*	1 lottery entrant sample for each respective bootstrap iteration*/
/*	to estimate the LATE in the BRFSS data using the pB, pI, and	*/
/*	s(pI) estimated in the BRFSS data using frequency weights. The	*/
/*	pB, pI, and s(pI) are also bootstrapped and therefore it is of	*/
/*	utmost importance to extract the MTO(p), MUO(p), and MTE(p)	*/
/*	information from the OHIE for the appropriate bootstrap		*/
/*	replication. The output from this code is used plotting "MA	*/
/*	LATE" on the MTE(p) line of the "extrapolation_or_ma_Y_num"	*/
/*	figure in the paper.						*/
/*									*/
/* OUTPUT:								*/
/* [*]	extrapolation_late: This data set contains the OHIE 	*/
/*	LATE for the sample of 1 lottery entrant, the BRFSS LATE, and	*/
/*	the difference between the two.					*/
/*									*/
/*----------------------------------------------------------------------*/

* Set up display options
clear
set type double
set more off, permanently

*************************************************
* OTHER SETUP			*
*************************************************

* Set up seeds
global Y_num_seed = 	6574358

*********************************************
* VARIABLES AND CONTROLS	  	*
*********************************************	

* Outcome variables
local Ys "Y_num"

* Number of bootstrap replications
* The first replication of the bootstrap process has no re-sampling; in
* other words the first replication always runs on the full original
* non-bootstrapped sample.
* This bootstrapping only applies to the BRFSS LATE. We already bootstrapped
* the OHIE LATE and therefore we will use the estimated and bootstrapped MTE(p)
* functions to estimate the LATE in the BRFSS data.
local reps = 1001

* Endogenous variable
* Here we specify the BRFSS endogenous variable name
local D		"D"

* Instrument
* Here we specify the BRFSS instrument variable name
local Z		"Z"

* Frequency weights
* These are absolutely essential for extrapolation to Massachusetts and this
* variable needs to be specified here.
local wt	"w"

* Output Excel file 
local out_file "extrapolation_late"	

* Define input file	
local infile_brfss "$final/brfss.dta"

*********************************************
* RUN CODE  	*
*********************************************	

* Begin looping through outcomes here
foreach Y of local Ys {

	* Set the seed for the specific outcome
	set seed $`Y'_seed
	
	* Initialize a list of variables to rename later on
	local renamevars ""
	
	* Create a matrix for outputting
	matrix brfss_treat_eff_`Y' = J(`reps',2,.)
	
	di "BEGIN BOOTSTRAP LOOP"

	* Begin bootstrapping loop
	forval rep = 1/`reps' {

	*********************************************
	* IMPORT THE OREGON DATA	  	  *
	*********************************************
	* We use the MTO(p), MUO(p), MTE(p), and LATE estimated in the code
	* linmte_no_covars_treat_eff.do, extracted for the particular bootstrap
	* replication from the OHIE 1 lottery entrant sample and applied to the
	* BRFSS data. We use the same seeds to bootstrap the BRFSS data as we 
	* did in OHIE. Note that we use the estimates from the linear MTE
	* without covariates from the OHIE 1 lottery entrant sample.

		* Use the previously bootstrapped MTO(p), MUO(p), and MTE(p)
		* functions from the OHIE 1 lottery sample, the linear case
		* without covariates.
		use "$final/treat_eff_boot_`Y'.dta", clear
		
		* Convert the imported data into a matrix of values
		* We import the MTO(p) function, the MUO(p) function, the MTE(p)
		* function and the LATE from the appropriate bootstrap
		* replication from the OHIE 1 lottery entrant estimates.
		* We actually do not need to recover the MTO(p) and MUO(p)
		* function, but in order to use the treatment_effects.ado file
		* to estimate the LATE, we need to specify an MTO(p) and
		* MUO(p) function.
		mkmat MTO_intercept MTO_slope MUO_intercept MUO_slope MTE_intercept MTE_slope LATE, matrix(ohie_data)
		
		* Create matrices to save MTO, MUO, and MTE in separate matrices
		matrix MTO_matrix = J(1,2,.)
		matrix MUO_matrix = J(1,2,.)
		matrix MTE_matrix = J(1,2,.)
		
		* Obtain the column number of each component of interest from
		* the LATE, MTO(p), MUO(p), and MTE(p) in the new matrix so that
		* we can obtain their values later on
		local ests "MTO_intercept MTO_slope MUO_intercept MUO_slope MTE_intercept MTE_slope LATE"
		
		foreach e of local ests {
			local `e'_col = colnumb(ohie_data, "`e'")
		}
		
		* Populate the new MTO(p), MUO(p), and MTE(p) matrices with the
		* OHIE values for the appropriate bootstrap sample
		
		*MTO(p)
		matrix MTO_matrix[1,1] = ohie_data[`rep', `MTO_intercept_col']
		matrix MTO_matrix[1,2] = ohie_data[`rep', `MTO_slope_col']
		
		* MUO(p)
		matrix MUO_matrix[1,1] = ohie_data[`rep', `MUO_intercept_col']
		matrix MUO_matrix[1,2] = ohie_data[`rep', `MUO_slope_col']
		
		*MTE(p)
		matrix MTE_matrix[1,1] = ohie_data[`rep', `MTE_intercept_col']
		matrix MTE_matrix[1,2] = ohie_data[`rep', `MTE_slope_col']
		
		matrix list MTE_matrix
		
		* Save the LATE for the appropriate bootstrap sample as a scalar
		scalar define sc_LATE_ohie = ohie_data[`rep', `LATE_col']
		
	*********************************************
	* EXTRAPOLATE TO THE BRFSS DATA	   *
	*********************************************	
	
		* Set up the BRFSS data and re-sample

		use "`infile_brfss'", clear
		
		* Re-sample for bootstrapping
		 quietly if `rep' > 1 {
			bsample
		}
						
		* Compute the pB, pI, and s(pI) values in the BRFSS data
		* (weighted by the frequency weights)
			
		* Calculate pB
		* pB = P(D=1|Z=0)
		su `D' if `Z'==0 [aweight=`wt']
		local pB = `r(mean)'
		
		* Calculate pI
		* pI = P(D=1|Z=1)
		su `D' if `Z'==1 [aweight=`wt']
		local pI = `r(mean)'
		
		* Calculate s(pI)
		* s(pI) = P(Z=1)
		su `Z' [aweight=`wt']
		local s_pI = `r(mean)'
					
		* Calculate the treatment effects using the MTO(p), MUO(p), and
		* MTE(p) from the OHIE 1 lottery sample but using the BRFSS pB,
		* pI, and s(pI) values.
			
		treatment_effects MTE_matrix MTO_matrix MUO_matrix `pB' `pI' `s_pI'
			
		* Save the Massachusetts and Oregon LATEs in the output matrix
		* and assign names for renaming later on
			
		matrix brfss_treat_eff_`Y'[`rep', 1] = sc_LATE
		if `rep'==1 {
			local renamevars "`renamevars' LATE_brfss"
		}
					
		matrix brfss_treat_eff_`Y'[`rep', 2] = sc_LATE_ohie
		if `rep'==1 {
			local renamevars "`renamevars' LATE_ohie"
		}
								
	} // end bootstrapping loop

	di	 "END BOOTSTRAP LOOP"
	
	* Save the output matrix
	svmat double brfss_treat_eff_`Y'
			
	* Delete extra variables and observations
	keep brfss_treat_eff_*
	
	gen obs_num = _n
	drop if obs_num>`reps'
	drop obs_num
				
	* Rename variables in the output matrix
	local i=1		
	foreach var of local renamevars {			
		rename brfss_treat_eff_`Y'`i' `var'
		local ++i
	}
		
	* Calculate differences between the BRFSS and OHIE LATEs
	gen diff_LATES = LATE_ohie - LATE_brfss
		
	* Output the bootstrap statistics
	bootstrap_statistics $output `out_file' `Y'
		
	* Save the bootstrapped file
	save "$final/brfss_treat_eff_boot_`Y'.dta", replace

} // end outcomes loop	
	
timer off 1
timer list 1
local hours = `r(t1)'/3600
di "Computing time is `hours' hours"

qui log close

